/* * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any later * version. You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package org.aitools.programd.parser; import java.io.IOException; import java.io.StringReader; import java.net.URL; import java.util.List; import org.aitools.programd.Core; import org.aitools.programd.processor.Processor; import org.aitools.programd.processor.ProcessorException; import org.aitools.programd.processor.ProcessorRegistry; import org.aitools.util.Classes; import org.aitools.util.resource.URLTools; import org.aitools.util.xml.JDOM; import org.apache.log4j.Logger; import org.jdom.CDATA; import org.jdom.Comment; import org.jdom.Content; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.Text; import org.jdom.input.SAXBuilder; /** * A generic parser that allows us to register processors for any element type. This has been heavily modified * (simplified) to use DOM. * * @param <P> the base class of Processor to be used * @author <a href="mailto:noel@aitools.org">Noel Bush</a> */ abstract public class GenericParser<P extends Processor> { /** * Corrects a tag to use a valid 1-dimensional index, and returns the index. If the index is missing or valid, 1 is * returned. * * @param element the element for which to get a valid 1-dimensional index * @since 4.1.3 * @return a valid 1-dimensional index */ public static int getValid1dIndex(Element element) { // Get a valid 1-dimensional index. try { return Integer.parseInt(element.getAttributeValue("index")); } catch (NumberFormatException e) { return 1; } } /** * Corrects a tag to use a valid 2-dimensional index, and returns the indices. If either index is invalid or missing, * it is set to 1. * * @param element the element for which to get a valid 2-dimensional index * @since 4.1.3 * @return a valid 2-dimensional index */ public static int[] getValid2dIndex(Element element) { String indexValue = element.getAttributeValue("index"); int[] result = { 1, 1 }; // Assign the default if the index attribute is empty. if (indexValue == null || "".equals(indexValue)) { return result; } // If only one dimension is specified, fill in the other as 1. int comma = indexValue.indexOf(','); if (comma < 0) { try { result[0] = Integer.parseInt(indexValue); } catch (NumberFormatException e) { // Nothing to do. } result[1] = 1; return result; } // (otherwise...) try { result[0] = Integer.parseInt(indexValue.substring(0, comma)); } catch (NumberFormatException e) { // Nothing to do. } try { result[1] = Integer.parseInt(indexValue.substring(comma + 1)); } catch (NumberFormatException e) { // Nothing to do. } return result; } private ProcessorRegistry<P> _registry; protected URL _baseURL; protected String _namespaceURI; // Convenience constants. protected Core _core; protected Logger _logger; /** The word "index", for convenience. */ protected static final String INDEX = "index"; /** * Creates a new GenericParser with the given Core as its owner. * * @param registry the registry of processors * @param core the Core that owns this */ public GenericParser(ProcessorRegistry<P> registry, Core core) { this._core = core; this._baseURL = this._core.getBaseURL(); this._logger = this._core.getLogger(); this._registry = registry; this._namespaceURI = this._registry.getNamespaceURI(); } /** * Formats a CDATA section node. * * @param node * @return the formatted CDATA section node */ public static String evaluate(CDATA node) { return String.format("<![CDATA[%s]]>", node.getText()); } /** * Formats a comment. * * @param comment * @return the formatted comment */ public static String evaluate(Comment comment) { return String.format("<!--%s-->", comment.getText()); } /** * Evaluates the given document and returns the result. * * @param document the document to evaluate * @return the result of evaluating the document * @throws ProcessorException if there is an error in processing */ public String evaluate(Document document) throws ProcessorException { return this.evaluate(document.getRootElement()); } /** * Recursively evaluates an element. * * @param element the element * @return the result of processing the element * @throws ProcessorException if there is an error in processing */ @SuppressWarnings("unchecked") public String evaluate(Element element) throws ProcessorException { // Is it a valid element? if (element == null) { return ""; } // Search for the tag in the processor registry. Class<? extends P> processorClass = null; String elementNamespaceURI = element.getNamespaceURI(); Document elementDocument = element.getDocument(); boolean emitXMLNS = elementDocument != null && (element.equals(element.getDocument().getRootElement()) || elementNamespaceURI != null && !elementNamespaceURI.equals(element.getDocument().getRootElement().getNamespaceURI())); if (elementNamespaceURI == null || this._registry.getNamespaceURI().equals(elementNamespaceURI)) { processorClass = this._registry.get(element.getName()); // Process the element with a new instance of the processor. return Classes.getNewInstance(processorClass, "Processor", this._core).process(element, this); } // otherwise (if this element is from a different namespace) if (element.getContent().size() == 0) { return JDOM.renderEmptyElement(element, emitXMLNS); } // otherwise... return JDOM.renderStartTag(element, emitXMLNS) + this.evaluate(element.getContent()) + JDOM.renderEndTag(element); } /** * Evaluates the given content list and returns the result. * * @param list the list of content to evaluate * @return the result of evaluating the given list of nodes * @throws ProcessorException if there is an error in processing */ public String evaluate(List<Content> list) throws ProcessorException { StringBuilder result = new StringBuilder(); for (Content node : list) { // Would be nice not to have to do this: if (node instanceof Element) { result.append(this.evaluate((Element) node)); } else if (node instanceof Text) { result.append(GenericParser.evaluate((Text) node)); } else if (node instanceof CDATA) { result.append(GenericParser.evaluate((CDATA) node)); } else if (node instanceof Comment) { result.append(GenericParser.evaluate((Comment) node)); } else { assert false : "Unknown subclass of jdom.org.Content!"; } } return result.toString(); } /** * Returns formatted text. * * @param text * @return the text */ public static String evaluate(Text text) { return text.getText(); } /** * @return the Core */ public Core getCore() { assert this._core != null : "Tried to get the Core from a GenericParser that does not have one!"; return this._core; } /** * Processes whatever is at the given URL, returning nothing. * * @param url the URL at which to find whatever is to be processed * @throws ProcessorException if there is a problem processing whatever is at the given URL */ public void process(URL url) throws ProcessorException { this._logger.info(String.format("Loading \"%s\".", URLTools.unescape(url))); this.processResponse(url); } /** * Processes an XML fragment provided in a string. This version of the method does <i>not</i> set a base URI for the * document fragment. * * @param input the string from which to create the document fragment * @return the result of processing the document fragment created from the given string * @throws ProcessorException if there was a problem processing the document fragment created from the given string * @throws JDOMException * @throws IOException */ public String processResponse(String input) throws ProcessorException, JDOMException, IOException { return this.evaluate(new SAXBuilder().build(new StringReader(input))); } /** * Processes an XML fragment provided in a string. * * @param input the string from which to create the document fragment * @param baseURI the base URI to set for the document fragment * @return the result of processing the document fragment created from the given string * @throws ProcessorException if there was a problem processing the document fragment created from the given string * @throws JDOMException * @throws IOException */ public String processResponse(String input, String baseURI) throws ProcessorException, JDOMException, IOException { Document document = new SAXBuilder().build(new StringReader(input)); document.setBaseURI(baseURI); return this.evaluate(document); } /** * Processes whatever is at the given URL and returns a response. * * @param url where to find what is to be processed * @return the result of processing whatever is found at the URL * @throws ProcessorException if there is a problem processing what is found at the URL */ public String processResponse(URL url) throws ProcessorException { return this.evaluate(JDOM.getDocument(url, this._core.getSettings().getXmlCatalogPath(), this._logger)); } /** * <p> * Creates a "mini-template" with a given tag and an optional child tag, then evaluates it recursively. * </p> * <p> * This method is used to map certain tags as combinations of other tags (as in <a * href="http://aitools.org/aiml/TR/2001/WD-aiml/#section-short-cut-elements">short-cut elements </a>). * </p> * * @param element the element to modify * @param newElementName the new name to give the element * @param childContent the name or content for the child to add * @param childType the type of the child * @return the result of processing this structure * @throws ProcessorException if there is an error in processing */ public String shortcutTag(Element element, String newElementName, String childContent, Class<? extends Content> childType) throws ProcessorException { String response = ""; // If the node is empty, we need not continue. if (element == null) { return ""; } /* * Process children (if any). Clearly, the root tag cannot have an empty type, and the children must exist. */ if (!"".equals(childContent) && (childType == Element.class || childType == Text.class)) { Element newElement = new Element(newElementName, element.getNamespaceURI()); /* * Create an XML node for the child tag. Note that we assume that the child is an empty tag with no attributes. * This is reasonable for AIML 1.0.1, but might not always be. */ if (childType == Element.class) { newElement.addContent(new Element(childContent, element.getNamespaceURI())); } else if (childType == Text.class) { newElement.setText(childContent); } // Now evaluate the node, just as if it came from the original AIML. response = response + this.evaluate(newElement); } return response; } }